/* * This library is part of OpenCms - * the Open Source Content Management System * * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * For further information about Alkacon Software, please see the * company website: http://www.alkacon.com * * For further information about OpenCms, please see the * project website: http://www.opencms.org * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.opencms.search.fields; import org.opencms.search.CmsSearchManager; import org.opencms.util.CmsStringUtil; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; /** * An individual field configuration in a search index.<p> * * @since 7.0.0 */ public class CmsSearchField { /** Th default boost factor (1.0), used in case no boost has been set for a field. */ public static final float BOOST_DEFAULT = 1.0f; /** Name of the field that contains the (optional) category of the document (hardcoded). */ public static final String FIELD_CATEGORY = "category"; /** Name of the field that usually contains the complete content of the document (optional). */ public static final String FIELD_CONTENT = "content"; /** Name of the field that contains the complete extracted content of the document as serialized object (hardcoded). */ public static final String FIELD_CONTENT_BLOB = "contentblob"; /** Name of the field that contains the document content date (hardcoded). */ public static final String FIELD_DATE_CONTENT = "contentdate"; /** Name of the field that contains the document creation date (hardcoded). */ public static final String FIELD_DATE_CREATED = "created"; /** Name of the field that contains the document creation date for fast lookup (hardcoded). */ public static final String FIELD_DATE_CREATED_LOOKUP = "created_lookup"; /** Name of the field that contains the document last modification date (hardcoded). */ public static final String FIELD_DATE_LASTMODIFIED = "lastmodified"; /** Name of the field that contains the document last modification date for fast lookup (hardcoded). */ public static final String FIELD_DATE_LASTMODIFIED_LOOKUP = "lastmodified_lookup"; /** Name of the field that usually contains the value of the "Description" property of the document (optional). */ public static final String FIELD_DESCRIPTION = "description"; /** Name of the field that usually contains the value of the "Keywords" property of the document (optional). */ public static final String FIELD_KEYWORDS = "keywords"; /** * Name of the field that usually combines all document "meta" information, * that is the values of the "Title", "Keywords" and "Description" properties (optional). */ public static final String FIELD_META = "meta"; /** Name of the field that contains all VFS parent folders of a document (hardcoded). */ public static final String FIELD_PARENT_FOLDERS = "parent-folders"; /** Name of the field that contains the document root path in the VFS (hardcoded). */ public static final String FIELD_PATH = "path"; /** * Name of the field that contains the (optional) document priority, * which can be used to boost the document in the result list (hardcoded). */ public static final String FIELD_PRIORITY = "priority"; /** * Name of the field that usually contains the value of the "Title" property of the document * as a keyword used for sorting and also for retrieving the title text (optional). * * Please note: This field should NOT be used for searching. Use {@link #FIELD_TITLE_UNSTORED} instead.<p> */ public static final String FIELD_TITLE = "title-key"; /** * Name of the field that usually contains the value of the "Title" property of the document * in an analyzed form used for searching in the title (optional). */ public static final String FIELD_TITLE_UNSTORED = "title"; /** Name of the field that contains the type of the document. */ public static final String FIELD_TYPE = "type"; /** Value of m_displayName if field should not be displayed. */ public static final String IGNORE_DISPLAY_NAME = "-"; /** Constant for the "compress" index setting. */ public static final String STR_COMPRESS = "compress"; /** Constant for the "no" index setting. */ public static final String STR_NO = "no"; /** Constant for the "tokenized" index setting. */ public static final String STR_TOKENIZED = "tokenized"; /** Constant for the "untokenized" index setting. */ public static final String STR_UN_TOKENIZED = "untokenized"; /** Constant for the "yes" index setting. */ public static final String STR_YES = "yes"; /** The special analyzer to use for this field. */ private Analyzer m_analyzer; /** The boost factor of the field. */ private float m_boost; /** Indicates if the content of this field is compressed. */ private boolean m_compressed; /** A default value for the field in case the content does not provide the value. */ private String m_defaultValue; /** Indicates if this field should be displayed. */ private boolean m_displayed; /** The display name of the field. */ private String m_displayName; /** The display name set from the configuration. */ private String m_displayNameForConfiguration; /** Indicates if this field should be used for generating the excerpt. */ private boolean m_excerpt; /** Indicates if the content of this field should be indexed. */ private boolean m_indexed; /** The search field mappings. */ private List<CmsSearchFieldMapping> m_mappings; /** The name of the field. */ private String m_name; /** Indicates if the content of this field should be stored. */ private boolean m_stored; /** Indicates if the content of this field should be tokenized. */ private boolean m_tokenized; /** * Creates a new search field configuration.<p> */ public CmsSearchField() { m_mappings = new ArrayList<CmsSearchFieldMapping>(); m_boost = BOOST_DEFAULT; } /** * Creates a new search field configuration.<p> * * The field will be tokenized if it is indexed. * The field will not be in the excerpt. * The boost value is the default, that is no special boost is used. * There is no default value.<p> * * @param name the name of the field, see {@link #setName(String)} * @param displayName the display name of this field, see {@link #setDisplayName(String)} * @param isStored controls if the field is stored and in the excerpt, see {@link #setStored(boolean)} * @param isIndexed controls if the field is indexed and tokenized, see {@link #setIndexed(boolean)} */ public CmsSearchField(String name, String displayName, boolean isStored, boolean isIndexed) { this(name, displayName, isStored, isIndexed, isIndexed, false, BOOST_DEFAULT, null); } /** * Creates a new search field configuration.<p> * * @param name the name of the field, see {@link #setName(String)} * @param displayName the display name of this field, see {@link #setDisplayName(String)} * @param isStored controls if the field is stored, see {@link #setStored(boolean)} * @param isCompressed controls if the filed is compressed, see {@link #setCompressed(boolean)} * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)} * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)} * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()} * @param analyzer the Lucene analyzer to use for this field * @param boost the boost factor for the field, see {@link #setBoost(float)} * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)} */ public CmsSearchField( String name, String displayName, boolean isStored, boolean isCompressed, boolean isIndexed, boolean isTokenized, boolean isInExcerpt, Analyzer analyzer, float boost, String defaultValue) { this(); setDisplayName(displayName); setName(name); setStored(isStored); setCompressed(isCompressed); setIndexed(isIndexed); setTokenized(isTokenized); setInExcerpt(isInExcerpt); setAnalyzer(analyzer); setBoost(boost); setDefaultValue(defaultValue); } /** * Creates a new search field configuration.<p> * * @param name the name of the field, see {@link #setName(String)} * @param displayName the display name of this field, see {@link #setDisplayName(String)} * @param isStored controls if the field is stored, see {@link #setStored(boolean)} * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)} * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)} * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()} * @param boost the boost factor for the field, see {@link #setBoost(float)} * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)} */ public CmsSearchField( String name, String displayName, boolean isStored, boolean isIndexed, boolean isTokenized, boolean isInExcerpt, float boost, String defaultValue) { this(name, displayName, isStored, false, isIndexed, isTokenized, isInExcerpt, null, boost, defaultValue); } /** * Adds a new field mapping to the internal list of mappings.<p> * * @param mapping the mapping to add */ public void addMapping(CmsSearchFieldMapping mapping) { m_mappings.add(mapping); } /** * Creates a Lucene field from the configuration and the provided content.<p> * * The configured name of the field as provided by {@link #getName()} is used.<p> * * If no valid content is provided (that is the content is either <code>null</code> or * only whitespace), then no field is created and <code>null</code> is returned.<p> * * @param content the content to create the field with * * @return a Lucene field created from the configuration and the provided content */ public Field createField(String content) { return createField(getName(), content); } /** * Creates a Lucene field with the given name from the configuration and the provided content.<p> * * If no valid content is provided (that is the content is either <code>null</code> or * only whitespace), then no field is created and <code>null</code> is returned.<p> * * @param name the name of the field to create * @param content the content to create the field with * * @return a Lucene field with the given name from the configuration and the provided content */ public Field createField(String name, String content) { if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) { content = getDefaultValue(); } if (content != null) { Index index = Field.Index.NO; if (isIndexed()) { if (isTokenizedAndIndexed()) { index = Field.Index.ANALYZED; } else { index = Field.Index.NOT_ANALYZED; } } Field.Store store = Field.Store.NO; if (isStored() || isCompressed()) { store = Field.Store.YES; } Field result = new Field(name, content, store, index); if (getBoost() != BOOST_DEFAULT) { result.setBoost(getBoost()); } return result; } return null; } /** * Two fields are equal if the name of the Lucene field is equal.<p> * * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(Object obj) { if (obj instanceof CmsSearchField) { return CmsStringUtil.isEqual(m_name, ((CmsSearchField)obj).m_name); } return false; } /** * Returns the analyzer used for this field.<p> * * @return the analyzer used for this field */ public Analyzer getAnalyzer() { return m_analyzer; } /** * Returns the boost factor of this field.<p> * * The boost factor is a Lucene function that controls the "importance" of a field in the * search result ranking. The default is <code>1.0</code>. A lower boost factor will make the field * less important for the result ranking, a higher value will make it more important.<p> * * @return the boost factor of this field */ public float getBoost() { return m_boost; } /** * Returns the boost factor of this field as String value for display use.<p> * * @return the boost factor of this field as String value for display use */ public String getBoostDisplay() { if (m_boost == BOOST_DEFAULT) { return null; } return String.valueOf(m_boost); } /** * Returns the default value to use if no content for this field was collected.<p> * * In case no default is configured, <code>null</code> is returned.<p> * * @return the default value to use if no content for this field was collected */ public String getDefaultValue() { return m_defaultValue; } /** * Returns the display name of the field.<p> * * @return the display name of the field */ public String getDisplayName() { if (!isDisplayed()) { return IGNORE_DISPLAY_NAME; } if (m_displayName == null) { return m_name; } else { return m_displayName; } } /** * Returns the displayNameForConfiguration.<p> * * @return the displayNameForConfiguration */ public String getDisplayNameForConfiguration() { return m_displayNameForConfiguration; } /** * Returns the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index.<p> * * @return the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index * * @see #isTokenizedAndIndexed() * @see #isIndexed() */ public String getIndexed() { if (isTokenizedAndIndexed()) { return String.valueOf(isTokenizedAndIndexed()); } if (isIndexed()) { return STR_UN_TOKENIZED; } else { return String.valueOf(isIndexed()); } } /** * Returns the mappings for this field.<p> * * @return the mappings for this field */ public List<CmsSearchFieldMapping> getMappings() { return m_mappings; } /** * Returns the name of this field in the Lucene search index.<p> * * @return the name of this field in the Lucene search index */ public String getName() { return m_name; } /** * The hash code for a field is based only on the field name.<p> * * @see java.lang.Object#hashCode() */ @Override public int hashCode() { return (m_name == null) ? 41 : m_name.hashCode(); } /** * Returns <code>true</code> if the content of this field is compressed.<p> * * If the field is compressed, it must also be stored, this means * {@link #isStored()} will always return <code>true</code> for compressed fields.<p> * * @return <code>true</code> if the content of this field is compressed */ public boolean isCompressed() { return m_compressed; } /** * Returns true if the field should be displayed.<p> * * @return returns true if the field should be displayed otherwise false */ public boolean isDisplayed() { return m_displayed; } /** * Returns the indexed.<p> * * @return the indexed */ public boolean isIndexed() { return m_indexed; } /** * Returns <code>true</code> if this fields content is used in the search result excerpt.<p> * * @return <code>true</code> if this fields content is used in the search result excerpt * * @see #isStored() */ public boolean isInExcerpt() { return m_excerpt; } /** * Returns <code>true</code> if this fields content is used in the search result excerpt.<p> * * A field can only be used in the excerpt if it is stored, see {@link #isStored()}.<p> * * @return <code>true</code> if this fields content is used in the search result excerpt * * @see #isStored() */ public boolean isInExcerptAndStored() { return m_excerpt && m_stored; } /** * Returns <code>true</code> if the content of this field is stored in the Lucene index.<p> * * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store} * for the concept behind stored and unstored fields.<p> * * @return <code>true</code> if the content of this field is stored in the Lucene index * * @see #isTokenizedAndIndexed() */ public boolean isStored() { return m_stored; } /** * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p> * * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Index} * for the concept behind tokenized and untokenized fields.<p> * * @return <code>true</code> if the content of this field is tokenized in the Lucene index */ public boolean isTokenized() { return m_tokenized; } /** * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p> * * A field can only be tokenized if it is also indexed, see {@link #isIndexed()}.<p> * * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Index} * for the concept behind tokenized and untokenized fields.<p> * * @return <code>true</code> if the content of this field is tokenized in the Lucene index * * @see #isStored() * @see #isIndexed() */ public boolean isTokenizedAndIndexed() { return m_tokenized && m_indexed; } /** * Sets the analyzer used for this field.<p> * * @param analyzer the analyzer to set */ public void setAnalyzer(Analyzer analyzer) { m_analyzer = analyzer; } /** * Sets the analyzer used for this field.<p> * * The parameter must be a name of a class the implements the Lucene {@link Analyzer} interface. * * @param analyzer the analyzer class name to set * * @throws Exception in case of problems creating the analyzer class instance */ public void setAnalyzer(String analyzer) throws Exception { setAnalyzer(CmsSearchManager.getAnalyzer(analyzer)); } /** * Sets the boost factor for this field.<p> * * The boost factor is a Lucene function that controls the "importance" of a field in the * search result ranking. The default is <code>1.0</code>. A lower boost factor will make the field * less important for the result ranking, a higher value will make it more important.<p> * * <b>Use with caution:</b> You should only use this if you fully understand the concept behind * Lucene boost factors. Otherwise it is likley that your result rankings will be worse then with * the default values.<p> * * @param boost the boost factor to set */ public void setBoost(float boost) { if (boost < 0.0f) { boost = 0.0f; } m_boost = boost; } /** * Sets the boost factor for this field from a String value.<p> * * @param boost the boost factor to set * * @see #setBoost(float) */ public void setBoost(String boost) { try { setBoost(Float.valueOf(boost).floatValue()); } catch (NumberFormatException e) { // invalid number format, use default boost factor setBoost(BOOST_DEFAULT); } } /** * Sets the boost factor of this field (only for display use).<p> * * @param boost the boost factor to set * * @see #setBoost(String) */ public void setBoostDisplay(String boost) { setBoost(boost); } /** * Controls if this field value will be stored compressed or not.<p> * * If this is set to <code>true</code>, the value for {@link #isStored()} will also * be set to <code>true</code>, since compressed fields are always stored.<p> * * @param compressed if <code>true</code>, the field value will be stored compressed */ public void setCompressed(boolean compressed) { m_compressed = compressed; if (compressed) { setStored(true); } } /** * Sets the default value to use if no content for this field was collected.<p> * * @param defaultValue the default value to set */ public void setDefaultValue(String defaultValue) { if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(defaultValue)) { m_defaultValue = defaultValue.trim(); } else { m_defaultValue = null; } } /** * Controls if the field is displayed or not.<p> * * @param displayed if true the field is displayed */ public void setDisplayed(boolean displayed) { m_displayed = displayed; } /** * Sets the display name. If the given name equals IGNORE_DISPLAY_NAME the field is not displayed.<p> * * @param displayName the display name to set */ public void setDisplayName(String displayName) { if (CmsStringUtil.isEmpty(displayName) || (IGNORE_DISPLAY_NAME.equals(displayName))) { m_displayName = null; setDisplayed(false); } else { m_displayName = displayName; m_displayNameForConfiguration = displayName; setDisplayed(true); } } /** * Sets the displayNameForConfiguration.<p> * * @param displayNameForConfiguration the displayNameForConfiguration to set */ public void setDisplayNameForConfiguration(String displayNameForConfiguration) { m_displayNameForConfiguration = displayNameForConfiguration; setDisplayName(displayNameForConfiguration); } /** * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index.<p> * * @param indexed the indexed to set * * @see #setTokenized(boolean) */ public void setIndexed(boolean indexed) { m_indexed = indexed; } /** * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index from a String parameter.<p> * * This sets the values for {@link #isIndexed()} as well as {@link #isTokenizedAndIndexed()}.<p> * * The parameter can have the following values: * <ul> * <li><b>"true"</b> or <b>"tokenized"</b>: The field is indexed and tokenized. * <li><b>"false"</b> or <b>"no"</b>: The field is not indexed and not tokenized. * <li><b>"untokenized"</b>: The field is indexed but not tokenized. * </ul> * * @param indexed the index setting to use * * @see #setIndexed(boolean) * @see #setTokenized(boolean) */ public void setIndexed(String indexed) { boolean isIndexed = false; boolean isTokenized = false; if (indexed != null) { indexed = indexed.trim().toLowerCase(); if (STR_TOKENIZED.equals(indexed)) { isIndexed = true; isTokenized = true; } else if (STR_UN_TOKENIZED.equals(indexed)) { isIndexed = true; } else if (STR_NO.equals(indexed)) { // "no", both values will be false } else { // only "true" or "false" remain isIndexed = Boolean.valueOf(indexed).booleanValue(); isTokenized = isIndexed; } } setIndexed(isIndexed); setTokenized(isTokenized); } /** * Controls if this fields content is used in the search result excerpt.<p> * * @param excerpt if <code>true</code>, then this fields content is used in the search excerpt */ public void setInExcerpt(boolean excerpt) { m_excerpt = excerpt; } /** * Controls if this fields content is used in the search result excerpt.<p> * * @param excerpt if <code>"true"</code>, then this fields content is used in the search excerpt * * @see #setInExcerpt(boolean) */ public void setInExcerpt(String excerpt) { setInExcerpt(Boolean.valueOf(String.valueOf(excerpt)).booleanValue()); } /** * Sets the name of this field in the Lucene search index.<p> * * @param name the name to set */ public void setName(String name) { m_name = name; } /** * Controls if the content of this field is stored in the Lucene index.<p> * * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store} * for the concept behind stored and unstored fields.<p> * * @param stored if <code>true</code>, then the field content is stored * * @see #setTokenized(boolean) */ public void setStored(boolean stored) { m_stored = stored; } /** * Controls if the content of this field is stored in the Lucene index from a String parameter.<p> * * @param stored if <code>"true"</code>, then the field content is stored * * @see #setStored(boolean) */ public void setStored(String stored) { boolean isStored = false; boolean isCompressed = false; if (stored != null) { stored = stored.trim().toLowerCase(); if (STR_COMPRESS.equals(stored)) { isCompressed = true; isStored = true; } else if (STR_YES.equals(stored)) { // "yes", value will be stored but not compressed isStored = true; } else { // only "true" or "false" remain isStored = Boolean.valueOf(stored).booleanValue(); } } setStored(isStored); setCompressed(isCompressed); } /** * Controls if the content of this field is tokenized in the Lucene index.<p> * * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Index} * for the concept behind tokenized and untokenized fields.<p> * * @param tokenized if <code>true</code>, then the field content is tokenized * * @see #setStored(boolean) */ public void setTokenized(boolean tokenized) { m_tokenized = tokenized; } }